import json
import os
import time

import tqdm

from ai.utils import utils_file
from assist_utils import *
import threading
from ai import AiConstant
import jsonlines

url = f"https://movie.douban.com/subject/33447633/comments?start={120}&limit=20&status=P&sort=new_score"

header = {
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
    "Accept-Encoding": "gzip, deflate, br",
    "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
    "Cache-Control": "max-age=0",
    "Connection": "keep-alive",
    "Cookie": 'll="118371"; bid=Fsbe9sfmWeE; douban-fav-remind=1; _pk_id.100001.4cf6=418738468fffe252.1687100016.; __yadk_uid=QALpMKXECMQyz7KgtiAoZB3f9sDCr7wk; _vwo_uuid_v2=D22F58820969D590BFCDCC8325B7D22D5|2ef8c52d900cdee29e6c7a70634e2fd9; viewed="35121203_36142067_36206397"; _ga=GA1.1.1410860224.1696751048; _ga_Y4GN1R87RG=GS1.1.1696751048.1.1.1696751067.0.0.0; ap_v=0,6.0; _pk_ref.100001.4cf6=%5B%22%22%2C%22%22%2C1696767285%2C%22https%3A%2F%2Fm.douban.com%2F%22%5D; _pk_ses.100001.4cf6=1; ct=y; dbcl2="231323398:RLeLw+DqlGM"; ck=pxhW; push_noty_num=0; push_doumail_num=0',
    # "Cookie":"ll="118371"; bid=Fsbe9sfmWeE; douban-fav-remind=1; _pk_id.100001.4cf6=418738468fffe252.1687100016.; __yadk_uid=QALpMKXECMQyz7KgtiAoZB3f9sDCr7wk; _vwo_uuid_v2=D22F58820969D590BFCDCC8325B7D22D5|2ef8c52d900cdee29e6c7a70634e2fd9; viewed="35121203_36142067_36206397"; _ga=GA1.1.1410860224.1696751048; _ga_Y4GN1R87RG=GS1.1.1696751048.1.1.1696751067.0.0.0; _pk_ref.100001.4cf6=%5B%22%22%2C%22%22%2C1696751069%2C%22https%3A%2F%2Fm.douban.com%2F%22%5D; _pk_ses.100001.4cf6=1; ap_v=0,6.0",
    "Host": "movie.douban.com",
    "Sec-Fetch-Dest": "document",
    "Sec-Fetch-Mode": "navigate",
    "Sec-Fetch-Site": "none",
    "Sec-Fetch-User": "?1",
    "Upgrade-Insecure-Requests": "1",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36",
    # "sec-ch-ua":"Google Chrome";v="117", "Not;A=Brand";v="8", "Chromium";v="117"",
    "sec-ch-ua-mobile": "?0",
    "sec-ch-ua-platform": "Windows",
}
header2 = {
    "Cookie": 'll="118371"; bid=Fsbe9sfmWeE; douban-fav-remind=1; _pk_id.100001.4cf6=418738468fffe252.1687100016.; __yadk_uid=QALpMKXECMQyz7KgtiAoZB3f9sDCr7wk; _vwo_uuid_v2=D22F58820969D590BFCDCC8325B7D22D5|2ef8c52d900cdee29e6c7a70634e2fd9; viewed="35121203_36142067_36206397"; _ga=GA1.1.1410860224.1696751048; _ga_Y4GN1R87RG=GS1.1.1696751048.1.1.1696751067.0.0.0; ap_v=0,6.0; _pk_ref.100001.4cf6=%5B%22%22%2C%22%22%2C1696767285%2C%22https%3A%2F%2Fm.douban.com%2F%22%5D; _pk_ses.100001.4cf6=1; ct=y; dbcl2="231323398:RLeLw+DqlGM"; ck=pxhW; push_noty_num=0; push_doumail_num=0',
}

SAVE_PATH = "./data.jsonl"
PAGE_SIZE = 30
SORT_TYPE = 0  # 0: 正序,1:倒序
SAVE_WITH_TITLE = True
SAVE_WITH_ALBUM_ID = True


def get_comment_and_title_from_url(start: int, limit: int = 20):
    """
    通过解析静态页面获得音频地址
    url: 'https://www.ximalaya.com/album/9741525'
    """
    url = f"https://movie.douban.com/subject/33447633/comments?start={start}&limit={limit}&status=P&sort=new_score"
    response = send_request(url, headers=header)
    if response is None:
        return
    logger.info('加载成功, 开始处理')
    text = response.text
    tree = text2special_file(text)
    if tree is None:
        return
    xpath_comment = '//*[@id="comments"]/div/div[2]/p/span/text()'
    datas = tree.xpath(xpath_comment)
    # logger.info(f'处理专辑成功,comment:{datas}')
    xpath_title = '//*[@id="comments"]/div/div[2]/h3/span[2]/span[2]/@title'
    titles = tree.xpath(xpath_title)
    # logger.info(f'处理专辑成功,title: {titles}')
    assert len(datas) == len(titles)
    res_list = []
    for comment, title in zip(datas, titles):
        res = {
            "title": title,
            "comment": comment
        }
        res_list.append(res)
        with jsonlines.open(SAVE_PATH, mode='a') as f:
            f.write(res)


def get_ct_from_movie():
    num_sample = 120
    for i in range(0, num_sample, 20):
        get_comment_and_title_from_url(i)


def get_movies():
    res = set()
    movie_num = 600
    url = "https://movie.douban.com/explore"
    xpath_a = '/html/body/div[3]/div[1]/div/div[1]/div/div/div[2]/ul/li/a/@href'
    xpath_next = '/html/body/div[3]/div[1]/div/div[1]/div/div/div[2]/div/button'
    html_text, driver = get_full_html_from_url(url)
    write_to_html(html_text, 'gxl.html')
    a_list = handle_xpath(html_text, xpath_a)
    res.update(a_list)
    logger.info(f'处理专辑成功,len(res): {len(res)}')
    while len(res) < movie_num:
        html_text, driver = do_a_click_to_a_driver(driver, xpath_next)
        a_list = handle_xpath(html_text, xpath_a)
        res.update(a_list)
        logger.info(f'处理专辑成功,len(res): {len(res)}')


def main():
    """"""
    get_movies()


if __name__ == '__main__':
    """"""
    main()
